This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
options(scipen = 999)
library(tidyverse)
library(ggplot2)
library(plotly)
library(data.table)
library(lubridate)
library(dplyr)
library(corrplot)
library(leaflet)
#Get only the Summary data for some initial analysis
summary_raw_data<-read_csv ('data/claim_summary_v1.csv' )
Error: 'data/claim_summary_v1.csv' does not exist in current working directory ('C:/User/sbussa/Personal/NSS/MidStone/Midcourse_sunitha').
#Get the data with all the required fields for analysis
#claim_detail_raw_data<-fread("data/claim_details.csv",sep = "|",fill = T)
#detail_raw_data<-read_csv ('data/claim_details.csv' )
locations<-read_csv('data/ESCo_LOCATIONS_lo.csv' )
Parsed with column specification:
cols(
location_id = [31mcol_character()[39m,
short_name = [31mcol_character()[39m,
location_address_1 = [31mcol_character()[39m,
location_address_2 = [31mcol_character()[39m,
location_city = [31mcol_character()[39m,
location_state = [31mcol_character()[39m,
location_zip_code = [32mcol_double()[39m,
latitude = [32mcol_double()[39m,
longitude = [32mcol_double()[39m
)
INITIAL ANALYSIS WITH THE SUMMARY DATA ONLY.
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
ANALYSIS USING SUMMARY AND DETAIL DATA Get the data and clean it
#get detail and summary data and save it in a df
detail_raw_data<-read_csv ('data/claim_details.csv' )
Duplicated column names deduplicated: 'patient_id' => 'patient_id_1' [37]Parsed with column specification:
cols(
.default = col_double(),
esco_id = [31mcol_character()[39m,
bene_hic_num = [31mcol_character()[39m,
claim_first_dialysis_date = [31mcol_character()[39m,
claim_last_dialysis_date = [31mcol_character()[39m,
location_id = [31mcol_character()[39m,
start_date = [34mcol_datetime(format = "")[39m,
end_date = [34mcol_datetime(format = "")[39m,
esrd_date = [34mcol_datetime(format = "")[39m,
modality = [31mcol_character()[39m,
hgb_cutoff = [31mcol_character()[39m,
epo_ceiling = [31mcol_character()[39m,
route_freq = [31mcol_character()[39m,
drug_name = [31mcol_character()[39m,
ferritin_cutoff = [31mcol_character()[39m,
tsat_cutoff = [31mcol_character()[39m,
sup_name = [31mcol_character()[39m,
tx_epo = [31mcol_character()[39m,
venofer_given = [31mcol_character()[39m,
venofer_wasted = [31mcol_character()[39m,
ferrlecit_given = [31mcol_character()[39m
# ... with 42 more columns
)
See spec(...) for full column specifications.
1 parsing failure.
row col expected actual file
7272 esrd_date date like NULL 'data/claim_details.csv'
dci_data<-detail_raw_data
dci_data<-dci_data %>%
select(-esco_id,-bene_hic_num,-full_encounter,-phys_neph,-phys_hosp,-phys_ed,-ambulance,-phys_other,-esco_aligned_flag,-inpatient_fluid,-outpatient_er_fluid,-part_a_other_fluid,-patient_id_1,-dos_yyyy,-dos_mm,-esrd_date,-hgb_date,-tsat_date,-ferr_date,-albumin_date,-pth_date,-ca_date,-cca_date,-ph_date,-k_date,-urr_date,-ktv_date,-epo_given,-venofer_wasted,-inpatient_access,-outpatient_er_access,-part_a_other_access,-dci_claims,-non_dci_claims,-hha,-hospice,-dme,-start_date,-end_date,-epo_ceiling,-route_freq,-drug_name,-ferritin_cutoff,-tsat_cutoff,-hgb_cutoff,-ferrlecit_wasted,-zemplar_iv_wasted,-calcijex_iv_wasted,-feraheme_wasted,-hectorol_iv_wasted,-tx_missed,-sensipar_dispensed)
#make sure all the columns are in correct data types.
#change claims date columns to date datatype
date_columns <- c("claim_first_dialysis_date","claim_last_dialysis_date")
dci_data[date_columns] <- lapply(dci_data[date_columns],as.Date)
#change the other columns to factor
fac_columns <- c("patient_id","dos_year","location_id","dos_month","modality","tx","tx_epo","sup_name")
dci_data[fac_columns] <- lapply(dci_data[fac_columns],as.factor)
#change the below columns to logical
bool_columns <- c("epo_protocol_flag","iron_protocol_flag","nutsup_protocol_flag","hgb_exclude_flag","active_flag")
dci_data[bool_columns] <- lapply(dci_data[bool_columns],as.logical)
#Make below colums as logical true if they have any value else if they have null make it logical false.
dci_data<-dci_data %>%
mutate_at(vars("ferrlecit_given","feraheme_given","venofer_given","zemplar_iv_given","hectorol_iv_given","calcijex_iv_given","zemplar_or_given","hectorol_or_given","calcijex_iv_given","zemplar_or_given","hectorol_or_given","calcijex_or_given","activase_given","prostat_given","nepro_given","liquacel_given","has_catheter","aranesp_given","protinex_given","mircera_given","sensipar_given","parsabiv_given","protein_bar_given",
"ice_cream_given","gelatein_given"),
funs(case_when(.=="NULL" ~ FALSE,
TRUE ~ TRUE)))
funs() is soft deprecated as of dplyr 0.8.0
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
[90mThis warning is displayed once per session.[39m
#FIRST make the null VALUES in char col to zero's except for date columns
num_columns <- c("hgb","tsat","ferr","albumin","pth","ca","cca","ph","k","urr","ktv","tx_epo")
dci_data[num_columns]<-dci_data[num_columns]%>%
replace(.=="NULL","0")
#change all the character columns ot numeric
dci_data<-dci_data %>% mutate_if(is.character,as.numeric)
#names(dci_data)
#as for date columns we cannot replace na values ot 0. first make them character columns and then make rest to the na values to 0 in entire dataframe
dci_data$claim_first_dialysis_date <-as.character.Date(dci_data$claim_first_dialysis_date )
dci_data$claim_last_dialysis_date <-as.character.Date(dci_data$claim_last_dialysis_date )
#make all na values to 0 in entire dataframe
dci_data[is.na(dci_data)]<-0
# sum(is.na(dci_data))
#making the dates column back to date datatype
dci_data$claim_first_dialysis_date <- as.Date(dci_data$claim_first_dialysis_date)
dci_data$claim_last_dialysis_date <- as.Date(dci_data$claim_last_dialysis_date)
#delete the outliers( payments which are less than 1500)
dci_data<-dci_data %>%
filter(`payment`>1500) %>%
arrange(desc(payment))
#combine 2 separate part_b payments to one
dci_data<-dci_data %>%
mutate(part_b = part_b_phys + part_b_dme)
dci_data %>%
select(payment,part_a,part_b_phys,part_b_dme,part_b)
#sum(is.na(dci_data$claim_last_dialysis_date))
sum(is.na(dci_data))
[1] 220
str(dci_data)
Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 9970 obs. of 72 variables:
$ patient_id : Factor w/ 672 levels "30111","39732",..: 293 50 64 251 21 31 293 86 441 172 ...
$ dos_year : Factor w/ 2 levels "2017","2018": 1 1 2 1 2 2 1 2 2 2 ...
$ dos_month : Factor w/ 12 levels "1","2","3","4",..: 7 7 4 7 3 7 9 2 3 8 ...
$ claim_first_dialysis_date: Date, format: NA NA NA "2017-07-13" ...
$ claim_last_dialysis_date : Date, format: NA NA NA "2017-07-29" ...
$ payment : num 128637 122674 121244 112103 109108 ...
$ part_a : num 119013 119153 111345 110036 102586 ...
$ part_b_phys : num 9623 3521 9899 2067 6521 ...
$ part_b_dme : num 0 0 0 0 0 ...
$ inpatient : num 119013 119153 111345 108511 102586 ...
$ outpatient_dialysis : num 0 0 0 1525 0 ...
$ outpatient_er : num 0 0 0 0 0 ...
$ outpatient_other : num 0 0 0 0 0 ...
$ snf : num 0 0 0 0 0 ...
$ vasc_access : num 179 0 309 0 0 ...
$ location_id : Factor w/ 25 levels "000026","000055",..: 18 22 17 24 25 3 18 1 4 5 ...
$ modality : Factor w/ 3 levels "HH","HIC","PD": 2 2 2 2 2 2 2 2 2 2 ...
$ epo_protocol_flag : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
$ iron_protocol_flag : logi FALSE TRUE TRUE TRUE TRUE TRUE ...
$ nutsup_protocol_flag : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
$ sup_name : Factor w/ 11 levels "Body Quest Ice Cream",..: 4 5 9 4 4 11 4 11 11 1 ...
$ tx : Factor w/ 24 levels "0","1","2","3",..: 9 1 5 1 1 3 1 1 6 1 ...
$ tx_epo : Factor w/ 16 levels "0","1","10","11",..: 9 1 2 1 1 8 1 1 2 1 ...
$ venofer_given : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ ferrlecit_given : logi TRUE FALSE FALSE FALSE FALSE FALSE ...
$ feraheme_given : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ zemplar_iv_given : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ hectorol_iv_given : logi TRUE FALSE TRUE FALSE FALSE TRUE ...
$ calcijex_iv_given : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ zemplar_or_given : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ hectorol_or_given : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ calcijex_or_given : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ activase_given : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ prostat_given : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
$ nepro_given : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
$ liquacel_given : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
$ hgb : num 11.1 0 8.6 0 0 10.4 0 0 0 0 ...
$ tsat : num 23 39 38 23 32 56 0 0 18 50 ...
$ ferr : num 2051 1401 1591 1037 667 ...
$ albumin : num 4 0 0 0 0 3.5 0 0 0 0 ...
$ pth : num 559 577 224 0 641 ...
$ ca : num 8.7 0 0 0 0 8.7 0 0 0 0 ...
$ cca : num 8.7 0 0 0 0 9.1 0 0 0 0 ...
$ ph : num 4.2 0 0 0 0 5 0 0 0 0 ...
$ k : num 4.9 0 0 0 0 5.1 0 0 0 0 ...
$ urr : num 72 0 70 0 0 78 0 0 0 0 ...
$ ktv : num 1.45 0 1.35 0 0 1.72 0 0 0 0 ...
$ has_catheter : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
$ aranesp_given : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
$ protinex_given : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
$ hgb_exclude_flag : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ mircera_given : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ sensipar_given : logi FALSE FALSE TRUE FALSE TRUE TRUE ...
$ parsabiv_given : logi FALSE FALSE TRUE FALSE TRUE TRUE ...
$ protein_bar_given : logi FALSE FALSE FALSE FALSE FALSE TRUE ...
$ ice_cream_given : logi FALSE FALSE FALSE FALSE FALSE TRUE ...
$ gelatein_given : logi FALSE FALSE FALSE FALSE FALSE TRUE ...
$ active_flag : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
$ hospital_episodes : num 2 1 2 2 1 3 1 0 1 1 ...
$ hospital_admits : num 2 0 2 1 0 3 0 0 1 0 ...
$ hospital_discharges : num 1 0 1 1 0 3 0 0 0 0 ...
$ er_visits : num 0 0 0 0 0 0 0 0 1 0 ...
$ snf_episodes : num 0 0 0 0 0 1 0 1 1 0 ...
$ snf_admit : num 0 0 0 0 0 0 0 0 0 0 ...
$ snf_discharge : num 0 0 0 0 0 1 0 0 1 0 ...
$ med_orders : num 8 16 10 7 12 46 8 9 20 16 ...
$ cardio : num 2 5 2 2 3 8 2 2 1 3 ...
$ beta_blockers : num 1 1 0 1 1 1 1 1 1 1 ...
$ antihypertensives : num 0 1 0 0 0 3 0 0 0 0 ...
$ opioids : num 1 0 0 0 0 1 1 1 1 0 ...
$ antidiabetics : num 0 1 3 1 1 4 0 0 1 3 ...
$ part_b : num 9623 3521 9899 2067 6521 ...
copy the cleaned dataset into another dataset just as a back up
dci_data_ws<- dci_data
saveRDS(dci_data_ws,file="dci_data.rds")
Histograms to understand the distribution of the data for payments
dci_data_ws %>%
select (payment,part_a,part_b_phys,part_b_phys) %>%
ggplot(aes(x=payment) )+
geom_histogram(color="#e9ecef", alpha=0.6, position = 'identity', bins=40) +
scale_x_log10()+
scale_fill_manual(values=c("#69b3a2", "#404080")) +
labs(x="loggged payment values",
y="Counts",
title = "Payment distribution")
#make this interactive
p<- ggplot(dci_data_ws,aes(x=payment,y=..density..)) +
geom_histogram(bins=40,binwidth = 0.05,fill = "black",color="black",alpha=0.2) +
scale_x_log10()+
geom_density(color = "red")+
labs(x="Logged payments",
y="Frequency",
title = "Total Payments distribution")
ggplotly(p)
NA
NA
NA
#make this interactive
p<- ggplot(dci_data_ws,aes(x=payment,fill=modality)) +
geom_histogram(bins=10,binwidth = 0.25,alpha=0.8,position = "dodge") +
scale_x_log10()+
labs(x="loggged payment values",
y="Frequency",
title = "Payment distribution")
p
Modality distributions
#density distribution for modalities
ggplot(dci_data_ws,aes(payment, color = modality,fill=modality)) +
scale_x_log10()+
geom_density(alpha = 0.8,position = "dodge")
dci_data_ws %>%
select(payment,modality) %>%
group_by(modality) %>%
summarise(total_payments=sum(payment),num_of_rows=n())
#Function to create an histgram for each kind of madality HIC,HH AND PD
make_plot <- function(mod){
dci_data_ws %>%
select(payment,modality) %>%
filter(modality == mod) %>%
ggplot(aes(x=payment)) +
geom_histogram(binwith=1000) +
scale_x_log10() +
labs(x="payments",
y="frequency",
title = paste("Distribution of",mod))
}
make_plot('HH')
Ignoring unknown parameters: binwith
make_plot('HIC')
Ignoring unknown parameters: binwith
make_plot('PD')
Ignoring unknown parameters: binwith
dci_data_ws %>%
select(payment,modality) %>%
filter(modality == 'HH') %>%
ggplot(aes(x=payment)) +
geom_histogram() +
labs(x="payments",
y="frequency",
title = "Distribution of Hemo Home Payments")
NA
dci_data_ws %>%
select(payment,modality) %>%
#filter(payment>2800 & payment <3500) %>%
group_by(modality)
NA
NA
NA
NA
dci_data_ws %>%
select(payment,modality) %>%
filter(payment>2800 & payment <3500) %>%
#group_by(modality) %>%
filter(modality=='PD') %>%
ggplot(aes(x=payment)) +
geom_histogram(binwidth = 4000)
dci_data_ws %>%
select(payment,modality) %>%
filter(payment>2800 & payment <3500)
library(reshape2)
dci_long <- reshape2::melt(dci_data_ws)
Using patient_id, dos_year, dos_month, location_id, modality, epo_protocol_flag, iron_protocol_flag, nutsup_protocol_flag, sup_name, tx, tx_epo, venofer_given, ferrlecit_given, feraheme_given, zemplar_iv_given, hectorol_iv_given, calcijex_iv_given, zemplar_or_given, hectorol_or_given, calcijex_or_given, activase_given, prostat_given, nepro_given, liquacel_given, has_catheter, aranesp_given, protinex_given, hgb_exclude_flag, mircera_given, sensipar_given, parsabiv_given, protein_bar_given, ice_cream_given, gelatein_given, active_flag as id variables
attributes are not identical across measure variables; they will be dropped
ggplot(dci_long, aes(value)) + facet_wrap(~variable, scales = 'free_x') +
geom_histogram()
NA
NA
#frequency distribution with density lines
#x<- sample(0:30, 200, replace=T, prob=15 - abs(15 - 0:30))
x<-dci_data_ws$payment
## Calculate and plot the two histograms
hcum <- h <- hist(x, plot=FALSE)
hcum$counts <- cumsum(hcum$counts)
plot(hcum, main="")
plot(h, add=T, col="grey")
## Plot the density and cumulative density
d <- density(x)
lines(x = d$x, y = d$y * length(x) * diff(h$breaks)[1], lwd = 2)
lines(x = d$x, y = cumsum(d$y)/max(cumsum(d$y)) * length(x), lwd = 2)
#distribution by payment type
payment_type_dist<-dci_data_ws %>%
select(dos_year,dos_month,payment,part_a,part_b_dme,part_b_phys,part_b)%>%
pivot_longer(payment:part_b,names_to="payment_type",values_to = "payment")
saveRDS(payment_type_dist,file="payment_type_dist.rds")
#plot the fequency distribution across different payment types
pl<-dci_data_ws %>%
select(dos_year,dos_month,location_id,payment,part_a,part_b_dme,part_b_phys,part_b)%>%
pivot_longer(payment:part_b,names_to="payment_type",values_to = "payment") %>%
filter(dos_year==2017 & dos_month==1) %>%
ggplot( aes(x=payment,fill=payment_type,color= payment_type)) +
geom_histogram(bins= 40,binwidth = 0.05,alpha=0.3,position = "identity") +
scale_x_log10() +
geom_vline(aes(xintercept = mean(payment,na.rm=T)),color = "red", linetype = "dashed",size = 1) +
labs(x="Logged payments",y="Frequency",title = "distribution of payments across payment sources")
ggplotly(pl)
Transformation introduced infinite values in continuous x-axisRemoved 286 rows containing non-finite values (stat_bin).
sum(is.na(dci_data$payment))
[1] 0
#fat tailed dis.
box_pl<-payment_type_dist %>%
filter(dos_year == 2017 & dos_month == 1) %>%
ggplot(aes(y = payment,x = payment_type,fill=payment_type)) +
geom_boxplot()+
# scale_y_log10() +
theme_classic() +
labs(x="Payment type", y = "Payment",
title = "Payment distribution by payment types")
ggplotly(box_pl)
years_plot<- dci_data_ws %>%
group_by(dos_year,dos_month) %>%
summarise(sum_pay = sum(payment)) %>%
ungroup()
pl <- ggplot( years_plot,aes(y=sum_pay, x = dos_month ,fill=dos_year)) +
geom_bar(stat = "identity",position = 'dodge') +
labs(x="Months", y= "Payments") +
ggtitle("Total payments increase in each month")
pl
years_plot
NA
dci_data_ws %>%
group_by(modality) %>%
summarise(payments = sum(payment)) %>%
ggplot(aes(x=modality,y=payments,fill = `payments`)) +
geom_bar(stat="identity" )
NA
NA
#show how modalities are doing by total payments
modality_pl<-dci_data_ws %>%
group_by(dos_year,modality) %>%
summarise(payments = sum(payment)) %>%
ungroup() %>%
ggplot(aes(x=dos_year,y=payments,fill=modality,color=modality))+
geom_bar(stat="identity",position="dodge") +
geom_text(aes(label = round(payments),vjust=0),postion=position_dodge(width=5)) +
labs(x="Modalities in 2017 and 2018",y="payments",title="Payments by modality")
Ignoring unknown parameters: postion
modality_pl
dci_data_ws %>%
ggplot(aes(x=dos_year,y=payment,color = modality)) +
geom_boxplot() +
scale_y_log10() +
labs(x = "Years",y="payments",
title = "Payments in in 2017 and 2018")
correlational plot
num_columns <- c("hgb","tsat","ferr","albumin","pth","ca","cca","ph","k","urr","ktv")
corrs<-dci_data_ws %>% select(num_columns) %>%
# drop_na_() %>%
cor()
corrs
hgb tsat ferr albumin pth ca cca ph k urr ktv
hgb 1.00000000 0.0527380941 0.02570081 0.59628329 0.045803409 0.58340275 0.56441343 0.3009506342 0.4724638 0.21430294 0.247192331
tsat 0.05273809 1.0000000000 0.39986167 0.08686222 -0.027163707 0.04433107 0.03910477 0.0002180982 0.0788412 0.06797837 0.087991355
ferr 0.02570081 0.3998616659 1.00000000 0.05132422 0.005239110 0.05485813 0.05667298 -0.0513212701 0.1183593 0.20079170 0.064636183
albumin 0.59628329 0.0868622245 0.05132422 1.00000000 0.104443153 0.76304243 0.67544496 0.3994618231 0.6250713 0.31241463 0.270818252
pth 0.04580341 -0.0271637069 0.00523911 0.10444315 1.000000000 0.03478453 0.01573087 0.2973311051 0.1093904 0.02051048 0.004094452
ca 0.58340275 0.0443310656 0.05485813 0.76304243 0.034784525 1.00000000 0.98442825 0.3359978827 0.6256086 0.27040412 0.303778243
cca 0.56441343 0.0391047722 0.05667298 0.67544496 0.015730870 0.98442825 1.00000000 0.3196562589 0.6140480 0.25157942 0.306643666
ph 0.30095063 0.0002180982 -0.05132127 0.39946182 0.297331105 0.33599788 0.31965626 1.0000000000 0.4446666 0.08193608 0.076707645
k 0.47246376 0.0788412002 0.11835935 0.62507130 0.109390410 0.62560864 0.61404796 0.4446665609 1.0000000 0.34881524 0.209681668
urr 0.21430294 0.0679783689 0.20079170 0.31241463 0.020510483 0.27040412 0.25157942 0.0819360755 0.3488152 1.00000000 0.080500799
ktv 0.24719233 0.0879913547 0.06463618 0.27081825 0.004094452 0.30377824 0.30664367 0.0767076454 0.2096817 0.08050080 1.000000000
library(corrplot)
corrplot(corrs,type = "upper",order = "hclust",
tl.col="black",tl.srt=45)
num_columns <- c("payment","hgb","tsat","ferr","albumin","pth","ca","cca","ph","k","urr","ktv")
corrs<-dci_data_ws %>% select(num_columns) %>%
cor()
corrplot(corrs,type = "upper",order = "hclust",
tl.col="black",tl.srt=45)
NA
NA
tibble('variable' = corrs[1,2:12] %>% names(),'correlation' = corrs[1,2:12]) %>%
ggplot(aes(x=reorder(variable,correlation),y = correlation)) +
geom_point()+
geom_segment(aes(xend=variable,yend=0))+
coord_flip() +
geom_hline(yintercept = 0)
pl <- ggplot(dci_data_ws, aes(x=albumin,y=payment) ) +
geom_point(alpha=0.2) + geom_smooth(method = 'lm') +
scale_x_log10() +
scale_y_log10()
ggplotly(pl)
Transformation introduced infinite values in continuous x-axisTransformation introduced infinite values in continuous x-axisRemoved 195 rows containing non-finite values (stat_smooth).
#unique(dci_data_ws$ktv)
#hgb vs payment
ggplot(dci_data_ws, aes(x=hgb,y=payment) ) +
geom_point() + geom_smooth(method = 'lm') +
scale_x_log10() +
labs(x="hgb", y = "payment", title = "Payment Vs hgb")
NA
NA
ggplot(dci_data_ws, aes(x=ca,y=payment) ) +
geom_point() + geom_smooth(method = 'lm') +
scale_x_log10() +
labs(x="ca", y = "payment", title = "Payment Vs ca")
Maps
#first prep the dataset to get the information on the map markers
payment_summary_by_loc<-dci_data_ws %>%
select(patient_id,location_id,dos_month,dos_year,payment,part_a,part_b_phys,part_b_dme,modality) %>%
group_by(location_id,dos_month,dos_year ) %>%
summarise(
total_patients = NROW(patient_id),
totalpayments = round(sum(payment),digits=2),
min_pay = min(payment),
max_pay = max(payment),
avg_pay = sum(payment)/NROW(patient_id)) %>%
ungroup() %>%
arrange(dos_year,dos_month)
#merge the grouped data with location dataset
payment_geom_summary <- merge(payment_summary_by_loc,locations,by = "location_id")
payment_geom_summary %>%
filter(location_id =='000055' & dos_month ==1 & dos_year ==2017)
#save it to rds file
saveRDS(payment_geom_summary, file = "DCI_midcourse/data/payment_geom_summary.rds")
leaflet(data= locations) %>%
addTiles() %>%
addMarkers(~longitude,
~latitude,
popup = ("hello"))
pl <- ggplot(dci_data_ws, aes(x=albumin,y=payment) ) +
geom_point(alpha=0.2) + geom_smooth(method = 'lm') +
scale_x_log10() +
scale_y_log10()
ggplotly(pl)
Transformation introduced infinite values in continuous x-axisTransformation introduced infinite values in continuous x-axisRemoved 195 rows containing non-finite values (stat_smooth).